Experiment 4

Each subject saw 192 trials, being 3 cycles of 64 trials. There were 40 subjects. There were 7680 rows in the full data.

Trial Procedure

First an instruction was presented until subjects dismissed it with a key press. Then a grey screen was presented for 1 second. Then a prime was presented for 2 seconds. Then a grey screen was presented for 1 second. Then the squares with dots were presented until the subject responded, with a 10 second timeout if they did not respond.

Mountain View
Flow of experiment 4.

Describe the data

Here is a description of each column in the raw data.

Here is a table of instructions.

vagueness selection instruction
crisp match Choose a square with the same number of dots as the target
crisp compare Choose a square with fewer dots than the target
vague match Choose a square with about the same number of dots as the target
vague compare Choose a square with far fewer dots than the target
Item dots
Item 1 (6,15,24)
Item 2 (16,25,34)
Item 3 (26,35,44)
Item 4 (36,45,54)

Source additional R files with functions for computing confidence intervals.

source("CIfunctions.R")

Get data from the raw per-subject files if necessary.

if (file.exists('data.txt')) {
    dat <- read.delim(file="data.txt", header=T, stringsAsFactors=FALSE)
} else {
    system(wait=TRUE, paste("head -n 1 ", "../experimentCode","/output/", "subject01.data ", "> ", "data.txt", sep=""))
    system(wait=TRUE, paste("tail -q -n +2 ", "../experimentCode", "/output/", "*.data ",  ">> ", "data.txt", sep=""))
}

# declare local variables
Selection_of_valid_subjects <- 40
Selection_of_rows <- 7680
Selection_of_trials_per_subject <- Selection_of_rows / Selection_of_valid_subjects # 192

# sort out borderline responses into expected near and far
# How many squares in the square they chose? 
dat$RESPONSE <- as.character(dat$key)
for (row in 1 : nrow(dat) ) {
  switch(dat[row,'RESPONSE'],
         'L' = {dat[row, 'choice'] <- dat[row, 'Lft']},
         'M' = {dat[row, 'choice'] <- dat[row, 'Mid']},
         'R' = {dat[row, 'choice'] <- dat[row, 'Rgt']}         
  )
}
dat$Item <- dat$Itm
dat$Itm <- NULL
dat$crossed=paste('Con',dat$Cnd,':Quan',dat$Qty,':Item',dat$Item,sep="")

dat[dat$crossed=='Con1:Quan1:Item1', 'ResponseExpected'] <-  6 
dat[dat$crossed=='Con1:Quan1:Item1', 'ResponseNear']     <- 15  
dat[dat$crossed=='Con1:Quan1:Item1', 'ResponseFar']      <- 24

dat[dat$crossed=='Con1:Quan1:Item2', 'ResponseExpected'] <- 16
dat[dat$crossed=='Con1:Quan1:Item2', 'ResponseNear']     <- 25  
dat[dat$crossed=='Con1:Quan1:Item2', 'ResponseFar']      <- 34

dat[dat$crossed=='Con1:Quan1:Item3', 'ResponseExpected'] <- 26
dat[dat$crossed=='Con1:Quan1:Item3', 'ResponseNear']     <- 35
dat[dat$crossed=='Con1:Quan1:Item3', 'ResponseFar']      <- 44

dat[dat$crossed=='Con1:Quan1:Item4', 'ResponseExpected'] <- 36
dat[dat$crossed=='Con1:Quan1:Item4', 'ResponseNear']     <- 45
dat[dat$crossed=='Con1:Quan1:Item4', 'ResponseFar']      <- 54

dat[dat$crossed=='Con1:Quan2:Item1', 'ResponseExpected'] <- 24
dat[dat$crossed=='Con1:Quan2:Item1', 'ResponseNear']     <- 15 
dat[dat$crossed=='Con1:Quan2:Item1', 'ResponseFar']      <-  6

dat[dat$crossed=='Con1:Quan2:Item2', 'ResponseExpected'] <- 34
dat[dat$crossed=='Con1:Quan2:Item2', 'ResponseNear']     <- 25  
dat[dat$crossed=='Con1:Quan2:Item2', 'ResponseFar']      <- 16

dat[dat$crossed=='Con1:Quan2:Item3', 'ResponseExpected'] <- 44
dat[dat$crossed=='Con1:Quan2:Item3', 'ResponseNear']     <- 35
dat[dat$crossed=='Con1:Quan2:Item3', 'ResponseFar']      <- 26

dat[dat$crossed=='Con1:Quan2:Item4', 'ResponseExpected'] <- 54
dat[dat$crossed=='Con1:Quan2:Item4', 'ResponseNear']     <- 45
dat[dat$crossed=='Con1:Quan2:Item4', 'ResponseFar']      <- 36

dat[dat$crossed=='Con2:Quan1:Item1', 'ResponseExpected'] <-  6 
dat[dat$crossed=='Con2:Quan1:Item1', 'ResponseNear']     <- 15  
dat[dat$crossed=='Con2:Quan1:Item1', 'ResponseFar']      <- 24

dat[dat$crossed=='Con2:Quan1:Item2', 'ResponseExpected'] <- 16
dat[dat$crossed=='Con2:Quan1:Item2', 'ResponseNear']     <- 25  
dat[dat$crossed=='Con2:Quan1:Item2', 'ResponseFar']      <- 34

dat[dat$crossed=='Con2:Quan1:Item3', 'ResponseExpected'] <- 26
dat[dat$crossed=='Con2:Quan1:Item3', 'ResponseNear']     <- 35
dat[dat$crossed=='Con2:Quan1:Item3', 'ResponseFar']      <- 44

dat[dat$crossed=='Con2:Quan1:Item4', 'ResponseExpected'] <- 36
dat[dat$crossed=='Con2:Quan1:Item4', 'ResponseNear']     <- 45
dat[dat$crossed=='Con2:Quan1:Item4', 'ResponseFar']      <- 54

dat[dat$crossed=='Con2:Quan2:Item1', 'ResponseExpected'] <- 24
dat[dat$crossed=='Con2:Quan2:Item1', 'ResponseNear']     <- 15 
dat[dat$crossed=='Con2:Quan2:Item1', 'ResponseFar']      <-  6

dat[dat$crossed=='Con2:Quan2:Item2', 'ResponseExpected'] <- 34
dat[dat$crossed=='Con2:Quan2:Item2', 'ResponseNear']     <- 25  
dat[dat$crossed=='Con2:Quan2:Item2', 'ResponseFar']      <- 16

dat[dat$crossed=='Con2:Quan2:Item3', 'ResponseExpected'] <- 44
dat[dat$crossed=='Con2:Quan2:Item3', 'ResponseNear']     <- 35
dat[dat$crossed=='Con2:Quan2:Item3', 'ResponseFar']      <- 26

dat[dat$crossed=='Con2:Quan2:Item4', 'ResponseExpected'] <- 54
dat[dat$crossed=='Con2:Quan2:Item4', 'ResponseNear']     <- 45
dat[dat$crossed=='Con2:Quan2:Item4', 'ResponseFar']      <- 36

dat[dat$crossed=='Con3:Quan1:Item1', 'ResponseExpected'] <-  6 
dat[dat$crossed=='Con3:Quan1:Item1', 'ResponseNear']     <- 15  
dat[dat$crossed=='Con3:Quan1:Item1', 'ResponseFar']      <- 24

dat[dat$crossed=='Con3:Quan1:Item2', 'ResponseExpected'] <- 16
dat[dat$crossed=='Con3:Quan1:Item2', 'ResponseNear']     <- 25  
dat[dat$crossed=='Con3:Quan1:Item2', 'ResponseFar']      <- 34

dat[dat$crossed=='Con3:Quan1:Item3', 'ResponseExpected'] <- 26
dat[dat$crossed=='Con3:Quan1:Item3', 'ResponseNear']     <- 35
dat[dat$crossed=='Con3:Quan1:Item3', 'ResponseFar']      <- 44

dat[dat$crossed=='Con3:Quan1:Item4', 'ResponseExpected'] <- 36
dat[dat$crossed=='Con3:Quan1:Item4', 'ResponseNear']     <- 45
dat[dat$crossed=='Con3:Quan1:Item4', 'ResponseFar']      <- 54

dat[dat$crossed=='Con3:Quan2:Item1', 'ResponseExpected'] <- 24
dat[dat$crossed=='Con3:Quan2:Item1', 'ResponseNear']     <- 15 
dat[dat$crossed=='Con3:Quan2:Item1', 'ResponseFar']      <-  6

dat[dat$crossed=='Con3:Quan2:Item2', 'ResponseExpected'] <- 34
dat[dat$crossed=='Con3:Quan2:Item2', 'ResponseNear']     <- 25  
dat[dat$crossed=='Con3:Quan2:Item2', 'ResponseFar']      <- 16

dat[dat$crossed=='Con3:Quan2:Item3', 'ResponseExpected'] <- 44
dat[dat$crossed=='Con3:Quan2:Item3', 'ResponseNear']     <- 35
dat[dat$crossed=='Con3:Quan2:Item3', 'ResponseFar']      <- 26

dat[dat$crossed=='Con3:Quan2:Item4', 'ResponseExpected'] <- 54
dat[dat$crossed=='Con3:Quan2:Item4', 'ResponseNear']     <- 45
dat[dat$crossed=='Con3:Quan2:Item4', 'ResponseFar']      <- 36

dat[dat$crossed=='Con4:Quan1:Item1', 'ResponseExpected'] <-  6 
dat[dat$crossed=='Con4:Quan1:Item1', 'ResponseNear']     <- 15  
dat[dat$crossed=='Con4:Quan1:Item1', 'ResponseFar']      <- 24

dat[dat$crossed=='Con4:Quan1:Item2', 'ResponseExpected'] <- 16
dat[dat$crossed=='Con4:Quan1:Item2', 'ResponseNear']     <- 25  
dat[dat$crossed=='Con4:Quan1:Item2', 'ResponseFar']      <- 34

dat[dat$crossed=='Con4:Quan1:Item3', 'ResponseExpected'] <- 26
dat[dat$crossed=='Con4:Quan1:Item3', 'ResponseNear']     <- 35
dat[dat$crossed=='Con4:Quan1:Item3', 'ResponseFar']      <- 44

dat[dat$crossed=='Con4:Quan1:Item4', 'ResponseExpected'] <- 36
dat[dat$crossed=='Con4:Quan1:Item4', 'ResponseNear']     <- 45
dat[dat$crossed=='Con4:Quan1:Item4', 'ResponseFar']      <- 54

dat[dat$crossed=='Con4:Quan2:Item1', 'ResponseExpected'] <- 24
dat[dat$crossed=='Con4:Quan2:Item1', 'ResponseNear']     <- 15 
dat[dat$crossed=='Con4:Quan2:Item1', 'ResponseFar']      <-  6

dat[dat$crossed=='Con4:Quan2:Item2', 'ResponseExpected'] <- 34
dat[dat$crossed=='Con4:Quan2:Item2', 'ResponseNear']     <- 25  
dat[dat$crossed=='Con4:Quan2:Item2', 'ResponseFar']      <- 16

dat[dat$crossed=='Con4:Quan2:Item3', 'ResponseExpected'] <- 44
dat[dat$crossed=='Con4:Quan2:Item3', 'ResponseNear']     <- 35
dat[dat$crossed=='Con4:Quan2:Item3', 'ResponseFar']      <- 26

dat[dat$crossed=='Con4:Quan2:Item4', 'ResponseExpected'] <- 54
dat[dat$crossed=='Con4:Quan2:Item4', 'ResponseNear']     <- 45
dat[dat$crossed=='Con4:Quan2:Item4', 'ResponseFar']      <- 36

dat$isResponseExpected <- dat$choice == dat$ResponseExpected
dat$isResponseNear <- dat$choice == dat$ResponseNear
dat$isResponseFar <- dat$choice == dat$ResponseFar

for ( row in 1:nrow(dat) ) {
  dat[row, 'switchResponse'] <- 
    ifelse(dat[row, 'isResponseExpected']==TRUE, 'Expected', 
           ifelse(dat[row, 'isResponseNear']==TRUE, 'Near', 'Far') ) 
}

# SUBJECT
# ensure Subject is a factor 
dat$Subject=factor(paste("s",sprintf("%02d",dat$Sub),sep=""))

# TRIAL
# Trial for subject, 1 to 192
dat$Trial = rep(x = 1:Selection_of_trials_per_subject, times = Selection_of_valid_subjects)

# make a centred Trial for modeling
dat$c_Trl <-dat$Trial - mean(dat$Trial)

# make a scaled Trial for modelling
dat$s_Trl <- as.numeric(scale(dat$Trial))

# ID
# id is a unique identifier for the 7680 row data
dat$id <- factor(paste(paste(dat$Subject), 
                       paste("t", sprintf("%03d", dat$Trial), sep="") , sep=":"))

# ITEM
# create a centred numeric item variable for modeling
dat$c_Itm <- ifelse(dat$Item==1, -.75, 
                    ifelse(dat$Item==2, -.25,
                           ifelse(dat$Item==3, .25, .75)))


# make Item be a factor and assign labels
dat$Item <- factor(dat$Item, levels=c(1,2,3,4), labels=c("06:15:24", "16:25:34", "26:35:44", "36:45:54"))

# add ratios for Item
item_range_ratio = c(6 / 24, 16 / 34, 26 / 44, 36 / 54) 
# 0.2500000 0.4705882 0.5909091 0.6666667
item_range_ratio_scaled = as.vector(scale(c(6 / 24, 16 / 34, 26 / 44, 36 / 54)))
# -1.3441995 -0.1316642  0.5297187  0.9461450
item_mean_ratio = c(mean(c(6 / 15, 15 / 24)), mean(c(16 / 25, 25 / 34)), mean(c(26 /35, 35 / 44)), mean(c(36 / 45, 45 / 54))) 
# 0.5125000 0.6876471 0.7691558 0.8166667
item_mean_ratio_scaled = as.vector(scale(c(mean(c(6 / 15, 15 / 24)), mean(c(16 / 25, 25 / 34)), mean(c(26 /35, 35 / 44)), mean(c(36 / 45, 45 / 54))))) 
# -1.37582241 -0.06614191  0.54334858 0.89861574

dat[dat$Item == "06:15:24", "item_range_ratio"] <-  0.2500000
dat[dat$Item == "16:25:34", "item_range_ratio"] <-  0.4705882
dat[dat$Item == "26:35:44", "item_range_ratio"] <-  0.5909091
dat[dat$Item == "36:45:54", "item_range_ratio"] <-  0.6666667

dat[dat$Item == "06:15:24", "item_range_ratio_scaled"] <-  -1.3441995
dat[dat$Item == "16:25:34", "item_range_ratio_scaled"] <-  -0.1316642
dat[dat$Item == "26:35:44", "item_range_ratio_scaled"] <-   0.5297187  
dat[dat$Item == "36:45:54", "item_range_ratio_scaled"] <-   0.9461450

dat[dat$Item == "06:15:24", "item_mean_ratio"] <-  0.5125000 
dat[dat$Item == "16:25:34", "item_mean_ratio"] <-  0.6876471 
dat[dat$Item == "26:35:44", "item_mean_ratio"] <-  0.7691558
dat[dat$Item == "36:45:54", "item_mean_ratio"] <-  0.8166667

dat[dat$Item == "06:15:24", "item_mean_ratio_scaled"] <-   -1.37582241 
dat[dat$Item == "16:25:34", "item_mean_ratio_scaled"] <-   -0.06614191 
dat[dat$Item == "26:35:44", "item_mean_ratio_scaled"] <-    0.54334858 
dat[dat$Item == "36:45:54", "item_mean_ratio_scaled"] <-    0.89861574

# VAGUENESS
# Create a factor coding for Vagueness
dat[ dat$Cnd==1 , 'Vagueness'] <- 'Vague'
dat[ dat$Cnd==2 , 'Vagueness'] <- 'Crisp'
dat[ dat$Cnd==3 , 'Vagueness'] <- 'Vague'
dat[ dat$Cnd==4 , 'Vagueness'] <- 'Crisp'
dat$Vagueness <- as.factor(dat$Vagueness)

# manually center Vagueness
dat$c_Vag <- ifelse(dat$Vagueness=='Crisp', -.5, .5)

dat$Selection = ""
dat[dat$Cnd %in% c(1,2), "Selection"] <- "matching"
dat[dat$Cnd %in% c(3,4), "Selection"] <- "comparison"
dat$Selection.sum <- ifelse(dat$Selection=="matching", .5, -.5)
dat$Selection <- as.factor(dat$Selection)

# remove redundant confusing columns
dat$Sub <- NULL
dat$Trl <- NULL

# ORDER
# give the levels of Order meaningful names
dat$Order <- factor(dat$Ord, levels=c(1,2), labels=c('LtoR','RtoL'))

# make a manually centred Order
dat$c_Ord <- ifelse(dat$Order=="LtoR",-.5,.5)

# QUANTITY
# give the levels of Quantity meaningful names
dat$Quantity <- factor(dat$Qty, levels=c(1,2), labels=c('Small','Large'))

# make a manually centred Quantity
dat$c_Qty <- ifelse(dat$Quantity=="Small",-.5,.5)

# INSTRUCTION
# add Selection of characters in the instruction # 29 30 34 36 38
dat$nchar_instr = nchar(as.character(dat$Ins))
dat$nchar_instr_scaled = as.vector(scale(nchar(as.character(dat$Ins)), scale=TRUE))

# make Instruction be a factor (17 levels)
dat$Ins <- as.factor(dat$Ins) 

# RT
# add transformations of RT
dat$RT <- dat$rt; dat$rt <-  NULL
dat$RT_RecSqd    <- 1/dat$RT^2
dat$RT_Rec       <- 1/dat$RT
dat$RT_RecSqt    <- 1/sqrt(dat$RT) 
dat$RT_log       <- log(dat$RT)
dat$RT_sqt       <- sqrt(dat$RT)
dat$RT_raw       <- dat$RT
dat$RT_sqd       <- dat$RT^2 

# lose impossible trials
dd <- dat
dd$RT[dd$RT > 9000 ] <- NA
dd <- dd[complete.cases(dd),]
row.names(dd) <- NULL
  
# add preceding RT: because we removed impossible trials, the value for preceding RT for a trial following an impossible trial is the value of the trial that preceded the impossible trial.
dd$RTprev <- NA
for (s in levels(dd$Subject)) {
  nrows = nrow(dd[dd$Subject==s,])
  for (i in 1:nrows) {
    if (i==1) {
      dd[dd$Subject==s, "RTprev"][i] <- dd[dd$Subject==s, "RT"][i]
    }
    else
      dd[dd$Subject==s, "RTprev"][i] <- dd[dd$Subject==s, "RT"][i-1]
  }
}

# add transformations of previous RT
dd$RTprev_RecSqd    <- 1/dd$RTprev^2
dd$RTprev_Rec       <- 1/dd$RTprev
dd$RTprev_RecSqt0   <- 1/sqrt(dd$RTprev) 
dd$RTprev_RecSqt    <- as.vector(scale( 1/sqrt(dd$RTprev)  ))
dd$RTprev_log       <- log(dd$RTprev)
dd$RTprev_sqt       <- sqrt(dd$RTprev)
dd$RTprev_raw       <- dd$RTprev
dd$RTprev_sqd       <- dd$RTprev^2

save(dd, file="data.Rda")

Compare distributions of the various transformations of RT against random samples from normal distributions with the same mean and sd to see which transformations best approximate normal distributions.

## plot the comparison of the transformations against their normal equivalents to see distributional properties using full data

# first create a data frame specially for transformations
# this won't be used for analysis, just for this plot
dat_transforms <- dat

# dfSubset says whether any data points were removed
dat_transforms$dfSubset <- "original"

# RTtype says whether the row contains data for an observed RT or 
# a sample from a normal distribution with the same mean and sd
dat_transforms$RTtype <- "observed"

# create extra rows for dfs with removed (outlier) data points
dat1 <- dat_transforms                                       # nrow 7680
dat2 <- subset(dat_transforms, RT<9000)              # nrow 7677
dat2$dfSubset <- "RT<9000"

# bring the dfs back into main df
dat_transforms <- rbind(dat1,dat2)
dat_transforms$dfSubset <- factor(dat_transforms$dfSubset, levels=c("original", "RT<9000"))

# create extra rows for normally distributed transformed variables
dato = dat_transforms
datn = dat_transforms; datn$RTtype <- "normal"
dat_transforms <- rbind(dato,datn)
dat_transforms$RTtype <- factor(dat_transforms$RTtype, levels=c("observed","normal"))

# for safety
dat_transforms[dat_transforms$RTtype=="normal", c("RT_RecSqd", "RT_Rec", "RT_RecSqt", "RT_log", "RT_sqt", "RT_raw", "RT_sqd")] <- NA

# Create the normally distributed transformed variables, replacing the NAs declared above
for (k in levels(dat_transforms$dfSubset)){
  dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="normal", "RT_RecSqd"]  <- 
    rnorm(n=length(dat_transforms[dat_transforms$dfSubset==k    & dat_transforms$RTtype=="observed", "RT_RecSqd"]), 
          mean=mean(dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="observed", "RT_RecSqd"]), 
          sd=sd(dat_transforms[dat_transforms$dfSubset==k     & dat_transforms$RTtype=="observed", "RT_RecSqd"]))
  dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="normal", "RT_Rec"]  <- 
    rnorm(n=length(dat_transforms[dat_transforms$dfSubset==k    & dat_transforms$RTtype=="observed", "RT_Rec"]), 
          mean=mean(dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="observed", "RT_Rec"]), 
          sd=sd(dat_transforms[dat_transforms$dfSubset==k     & dat_transforms$RTtype=="observed", "RT_Rec"]))
  dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="normal", "RT_RecSqt"]  <- 
    rnorm(n=length(dat_transforms[dat_transforms$dfSubset==k    & dat_transforms$RTtype=="observed", "RT_RecSqt"]), 
          mean=mean(dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="observed", "RT_RecSqt"]), 
          sd=sd(dat_transforms[dat_transforms$dfSubset==k     & dat_transforms$RTtype=="observed", "RT_RecSqt"]))
  dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="normal", "RT_log"]  <- 
    rnorm(n=length(dat_transforms[dat_transforms$dfSubset==k    & dat_transforms$RTtype=="observed", "RT_log"]), 
          mean=mean(dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="observed", "RT_log"]), 
          sd=sd(dat_transforms[dat_transforms$dfSubset==k     & dat_transforms$RTtype=="observed", "RT_log"]))
  dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="normal", "RT_sqt"]  <- 
    rnorm(n=length(dat_transforms[dat_transforms$dfSubset==k    & dat_transforms$RTtype=="observed", "RT_sqt"]), 
          mean=mean(dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="observed", "RT_sqt"]), 
          sd=sd(dat_transforms[dat_transforms$dfSubset==k     & dat_transforms$RTtype=="observed", "RT_sqt"]))
  dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="normal", "RT_raw"]  <- 
    rnorm(n=length(dat_transforms[dat_transforms$dfSubset==k    & dat_transforms$RTtype=="observed", "RT_raw"]), 
          mean=mean(dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="observed", "RT_raw"]), 
          sd=sd(dat_transforms[dat_transforms$dfSubset==k     & dat_transforms$RTtype=="observed", "RT_raw"]))
  dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="normal", "RT_sqd"]  <- 
    rnorm(n=length(dat_transforms[dat_transforms$dfSubset==k    & dat_transforms$RTtype=="observed", "RT_sqd"]), 
          mean=mean(dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="observed", "RT_sqd"]), 
          sd=sd(dat_transforms[dat_transforms$dfSubset==k     & dat_transforms$RTtype=="observed", "RT_sqd"]))
}

temp <- reshape2::melt(data=dat_transforms, 
             id.vars=c("dfSubset", "RTtype", "Item", "Vagueness", "Selection", "Quantity", "Order"),
             measure.vars=c("RT_RecSqd", "RT_Rec", "RT_RecSqt", "RT_log", "RT_sqt", "RT_raw", "RT_sqd"))

ggplot(temp, 
             aes(value, colour=RTtype)) + 
    geom_freqpoly() + 
  facet_wrap(dfSubset~variable, scales="free", nrow=2) +
  theme_bw() + theme(aspect.ratio=1) + scale_color_manual(values=c("blue","red"))

Show how transformations of RT affect distribution of times, and how they affect which times are outliers.

# boxplot subjects and items
load("data.Rda")
# allow reference to transformation
subdata = melt(dd,
               measure.vars=c("RT_Rec", "RT_RecSqt", "RT_log", "RT_raw")) 
# boxplots for subjects and items separately for each dependent variable
ggplot(subdata) + 
  facet_wrap(~ variable, scales="free", ncol=4) +
  geom_boxplot(aes(y=value, x=Item, col="Items")) + 
  geom_boxplot(aes(y=value, x=Subject, col="Subjects")) + 
  theme_bw() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1), aspect.ratio=1, panel.grid=element_blank()) + 
  scale_colour_manual("",values=c("Items"="red","Subjects"="blue")) +
  xlab("") + ylab("")

Show how mean times for individual subjects and items vary with respect to the grand mean RT.

# Show how mean times for individual subjects and items vary with respect to the grand mean RT.
subs=ddply(dd, .(level=Subject), summarise, effect="subject", emean=mean(dd$RT), lmean=mean(RT), ldiff=mean(RT)-mean(dd$RT), lsign=ifelse(mean(RT)-mean(dd$RT)>0,"slow","fast"))
itms=ddply(dd, .(level=Item), summarise, effect="item", emean=mean(dd$RT), lmean=mean(RT), ldiff=mean(RT)-mean(dd$RT), lsign=ifelse(mean(RT)-mean(dd$RT)>0,"slow","fast"))
si=rbind(subs,itms)
si$effect <- factor(si$effect, levels=c("subject", "item"))
si$lsign <- factor(si$lsign, levels=c("slow","fast"))

my1 <- xyplot(data=si, lmean~level | effect, 
              aspect=1,
              layout=c(2,1),
              ylim = extendrange(si$lmean, f = 0.10),
              par.settings=list(trellis.par.set(superpose.symbol = list(col = c("blue","red"), pch=19), superpose.line=list(col = c("blue","red")))),
              groups=si$lsign,
              xlab=NULL,
              ylab="RT (in ms)",
              key=list(text=list(levels(si$lsign)), lines=list(lty=c(1,1), col=trellis.par.get('superpose.line')$col, pch=c(19,19), type='o'), divide=1),
              mean=unique(si$emean),
              scales=list(x=list(relation="free", at=list(15.5, 32.5), labels=list("subject number", "item identifier"), cex=1 ), 
                          y=list(at=c(0, 2000, unique(si$emean), 4000, 6000, 8000,  10000), labels=c(0, 2000,  "Mean", 4000,  6000, 8000, 10000), cex=.9) ),
              panel=function(x,y,type,subscripts,groups=groups,...){
                panel.xyplot(x,y,type='p',groups=groups, subscripts=subscripts,...)
                panel.arrows(x0=x, y0=unique(si$emean), x1=x, y1=y, groups=groups, subscripts=subscripts, code=3, length=0, col=trellis.par.get('superpose.line')$col[groups][subscripts], ...)
                panel.abline(h=unique(si$emean), col="lightgrey",lty=1)
                panel.text(x, y=(ifelse(y<unique(si$emean),y-200,y+200)), as.numeric(si$level)[subscripts][panel.number()==1], cex=.75)
                panel.text(x, y=(ifelse(y<unique(si$emean),y-200,y+200)), as.character(si$level)[subscripts][panel.number()==2], cex=.75)
              })

my2 <- xyplot(data=si, ldiff~level | effect, 
              aspect=1, type='n',
              ylab="Deviation from mean RT (in ms)",
              ylim=extendrange(si$ldiff,f=0.10))

print(doubleYScale(my1, my2, add.axis=TRUE, add.ylab2=TRUE, use.style=FALSE))

Show how practice and fatigue effects vary over subjects

# Show how practice/fatigue effects vary over subjects
ggplot(dd, aes(y=RT_log, x=Trial)) + facet_wrap(~Subject,nrow=3) + theme(panel.grid=element_blank(), panel.margin = unit(0, "lines"), panel.background= element_rect(fill = 'white', colour='black' ), legend.position="top") + scale_x_continuous("Trial", breaks=NULL) + geom_point(pch=19, cex=.25, alpha=.5) + stat_smooth(fill='lightblue',col="red",lwd=.5, show.legend=TRUE) + theme(aspect.ratio = 1) 

Show practice and fatigue effects vary over items.

# Show how practice/fatigue effects vary over items
ggplot(dd, aes(y=RT_log, x=Trial)) + facet_grid(~Item) + theme(panel.grid=element_blank(), panel.margin = unit(0, "lines"), panel.background= element_rect(fill = 'white', colour='black' ), legend.position="top") + scale_x_continuous("Trial") + geom_point(pch=19, cex=.25, alpha=.5) + stat_smooth(fill='lightblue',col="red",lwd=.5, show.legend=TRUE) + theme(aspect.ratio = 1) 

Show means.

Main effects

# prep main effects rts data frame
vrts <- cbind(Effect="Vagueness",x=factor(c(1,2)),summarySEwithin(dd, measurevar="RT_log", withinvars=c("Vagueness")));names(vrts)[3]<-"Levels"
nrts <- cbind(Effect="Selection",x=factor(c(1,2)), summarySEwithin(dd, measurevar="RT_log", withinvars=c("Selection")));names(nrts)[3]<-"Levels"
qrts <- cbind(Effect="Quantity",x=factor(c(1,2)), summarySEwithin(dd, measurevar="RT_log", withinvars=c("Quantity")));names(qrts)[3]<-"Levels"
orts <- cbind(Effect="Order",x=factor(c(1,2)), summarySEwithin(dd, measurevar="RT_log", withinvars=c("Order")));names(orts)[3]<-"Levels"
irts <- cbind(Effect="Item",x=factor(c(1,2,3,4)), summarySEwithin(dd, measurevar="RT_log", withinvars=c("Item")));names(irts)[3]<-"Levels"
rts=rbind(vrts,nrts,qrts,orts)
rts$mins=rts$RT_log-rts$ci
rts$maxs=rts$RT_log+rts$ci

# plot main effects
ggplot(rts, aes(y=RT_log, x=Levels, group=1, ymin=mins, ymax=maxs)) + 
  geom_line() +
  geom_errorbar(width=.1) +
  geom_point(cex=3) + 
  facet_grid(~Effect, scale="free_x") + 
  theme_bw() +
  theme(aspect.ratio=1) 

Main effects over items

vrts <- cbind(Effect="Vagueness",x=factor(c(1,2)),summarySEwithin(dd, measurevar="RT_log", withinvars=c("item_mean_ratio","Vagueness")));names(vrts)[4]<-"Levels"
nrts <- cbind(Effect="Selection",x=factor(c(1,2)), summarySEwithin(dd, measurevar="RT_log", withinvars=c("item_mean_ratio","Selection")));names(nrts)[4]<-"Levels"
qrts <- cbind(Effect="Quantity",x=factor(c(1,2)), summarySEwithin(dd, measurevar="RT_log", withinvars=c("item_mean_ratio","Quantity")));names(qrts)[4]<-"Levels"
orts <- cbind(Effect="Order",x=factor(c(1,2)), summarySEwithin(dd, measurevar="RT_log", withinvars=c("item_mean_ratio","Order")));names(orts)[4]<-"Levels"

rts=rbind(vrts,nrts,qrts,orts)
rts$mins=rts$RT_log-rts$ci
rts$maxs=rts$RT_log+rts$ci

rts$item_mean_ratio <- as.numeric(as.character(rts$item_mean_ratio))

ggplot(rts, aes(y=RT_log, x=item_mean_ratio, group=Levels, ymin=mins, ymax=maxs, shape=Levels, fill=Levels, col=Levels)) +
  scale_shape_manual(values=c(21,21,22,22,23,23,24,24)) +
  scale_fill_manual(values=c("black","gray","black","gray","black","gray","black","gray")) +
  scale_color_manual(values=c("black","gray","black","gray","black","gray","black","gray")) +
  geom_line() +
  geom_errorbar(width=.01) +
  geom_point(cex=3) + 
  facet_grid(~Effect) + 
  theme_bw() +
  theme(aspect.ratio=1, 
        panel.grid=element_blank()) +
  scale_x_continuous(breaks = round(sort(unique(rts$item_mean_ratio)),1)) 

2 way interactions ignoring item

# prep 2 ways rts data frame
rts1 <- summarySEwithin(dd, measurevar="RT_log", withinvars=c("Vagueness","Selection")); names(rts1)[1] <- "F1"; names(rts1)[2] <- "F2"
rts1$Effect <- rep("Vagueness:Selection",4)
rts2 <- summarySEwithin(dd, measurevar="RT_log", withinvars=c("Vagueness","Quantity")); names(rts2)[1] <- "F1"; names(rts2)[2] <- "F2"
rts2$Effect <- rep("Vagueness:Quantity",4)
rts3 <- summarySEwithin(dd, measurevar="RT_log", withinvars=c("Vagueness","Order")); names(rts3)[1] <- "F1"; names(rts3)[2] <- "F2"
rts3$Effect <- rep("Vagueness:Order",4)
rts5 <- summarySEwithin(dd, measurevar="RT_log", withinvars=c("Selection","Quantity")); names(rts5)[1] <- "F1"; names(rts5)[2] <- "F2"
rts5$Effect <- rep("Selection:Quantity",4)
rts6 <- summarySEwithin(dd, measurevar="RT_log", withinvars=c("Selection","Order")); names(rts6)[1] <- "F1"; names(rts6)[2] <- "F2"
rts6$Effect <- rep("Selection:Order",4)
rts8 <- summarySEwithin(dd, measurevar="RT_log", withinvars=c("Quantity","Order")); names(rts8)[1] <- "F1"; names(rts8)[2] <- "F2"
rts8$Effect <- rep("Quantity:Order",4)
rts <- rbind(rts1,rts2,rts3,rts5,rts6,rts8)
rts <- subset(rts, select=c(Effect, F1, F2, RT_log, RT_log_norm, sd, se, ci))
rts$mins=rts$RT_log-rts$ci
rts$maxs=rts$RT_log+rts$ci
# plot 2 way interactions
ggplot(rts, aes(y=RT_log, x=F2, group=F1, ymin=mins, ymax=maxs, pch=F1)) + 
  geom_line() +
  geom_errorbar(width=.1) +
  geom_point(cex=3) + 
  facet_wrap(~Effect, scale="free_x", nrow=1) + 
  theme_bw() +
  theme(aspect.ratio=1) +
  theme(legend.title=element_blank()) +
  xlab("Levels")

Plot 2 way interactions over items

# prep 2 ways over items rts data frame
rts1 <- summarySEwithin(dd, measurevar="RT_log", withinvars=c("item_mean_ratio", "Vagueness","Selection")); names(rts1)[2] <- "F1"; names(rts1)[3] <- "F2"
rts1$E1 <- "Vagueness"
rts1$E2 <- "Selection"
rts2 <- summarySEwithin(dd, measurevar="RT_log", withinvars=c("item_mean_ratio", "Vagueness","Quantity")); names(rts2)[2] <- "F1"; names(rts2)[3] <- "F2"
rts2$E1 <- "Vagueness"
rts2$E2 <- "Quantity"
rts3 <- summarySEwithin(dd, measurevar="RT_log", withinvars=c("item_mean_ratio", "Vagueness","Order")); names(rts3)[2] <- "F1"; names(rts3)[3] <- "F2"
rts3$E1 <- "Vagueness"
rts3$E2 <- "Order"
rts4 <- summarySEwithin(dd, measurevar="RT_log", withinvars=c("item_mean_ratio", "Quantity", "Selection")); names(rts4)[2] <- "F1"; names(rts4)[3] <- "F2"
rts4$E1 <- "Quantity"
rts4$E2 <- "Selection"
rts5 <- summarySEwithin(dd, measurevar="RT_log", withinvars=c("item_mean_ratio", "Order","Selection")); names(rts5)[2] <- "F1"; names(rts5)[3] <- "F2"
rts5$E1 <- "Order"
rts5$E2 <- "Selection"
rts6 <- summarySEwithin(dd, measurevar="RT_log", withinvars=c("item_mean_ratio", "Quantity","Order")); names(rts6)[2] <- "F1"; names(rts6)[3] <- "F2"
rts6$E1 <- "Quantity"
rts6$E2 <- "Order"

rts <- rbind(rts1,rts2,rts3,rts4,rts5,rts6)
rts$mins=rts$RT_log-rts$ci
rts$maxs=rts$RT_log+rts$ci

rts$item_mean_ratio <- as.numeric(as.character(rts$item_mean_ratio))
rts$F3 <- factor(paste(rts$F1,rts$F2))

rts$E1 <- factor(rts$E1, levels=c("Selection","Order","Quantity","Vagueness"))
rts$E2 <- factor(rts$E2, levels=c("Selection","Order","Quantity","Vagueness"))

# The palette with black:
cbbPalette <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")

ggplot(rts, aes(y=RT_log, x=item_mean_ratio, group=F3, col=F2, pch=F1)) + 
  facet_grid(E2~E1, drop=F) + 
  geom_point(cex=3) + geom_line() + theme_bw() + 
  theme(aspect.ratio=1, panel.grid=element_blank()) +
  scale_color_manual(name="colour",values=cbbPalette) +
  scale_shape_discrete(name="shape") +
  scale_x_continuous(breaks = round(sort(unique(rts$item_mean_ratio)),1))

RT model all items

# backing off A LOT from maximal model to avoid warning about maxfun
rtm <- lme4::lmer(
  data=dd,
  RT_log ~ 
    c_Vag * Selection.sum + item_mean_ratio + c_Qty + c_Ord + s_Trl +
    RTprev_log  + 
  ( c_Vag * Selection.sum | Subject)
)
rtm.summary <- summary(rtm)
rtm.confint <- confint(rtm, method='Wald')[13:20,]

Print a table of coefficients.

Estimate Std. Error t value
(Intercept) 5.99 0.10 61.1
c_Vag -0.02 0.01 -1.6
Selection.sum 0.19 0.02 10.7
item_mean_ratio 0.43 0.04 11.5
c_Qty -0.00 0.01 -0.4
c_Ord 0.01 0.01 1.5
s_Trl -0.07 0.00 -16.7
RTprev_log 0.13 0.01 12.4
c_Vag:Selection.sum 0.14 0.03 5.3

Plot coefficients

par(las=2, mar=c(14,6,1,1))
y=coef(rtm.summary)[-1,1] # omit intercept
n=length(y)
plot(y, xaxt="n", xlab="", ylab="RT_log\n", pch=19, ylim=extendrange(y, f=.20), main="Coefficients")
abline(h=0)
axis(1, labels=names(y) , at=1:n)
grid()
segments(x0=1:n, x1=1:n, y0=rtm.confint[,1], y1=rtm.confint[,2], lwd=2)

Plot partial effects

par(mfrow=c(2,4))
plotLMER.fnc(rtm)
## effect size (range) for  c_Vag is  0.08472117 
## effect size (range) for  Selection.sum is  0.2597492 
## effect size (range) for  item_mean_ratio is  0.1308804 
## effect size (range) for  c_Qty is  0.003404283 
## effect size (range) for  c_Ord is  0.01320353 
## effect size (range) for  s_Trl is  0.2538381 
## effect size (range) for  RTprev_log is  0.4746599

suppressPackageStartupMessages(require(languageR))
cat("k is:",
collin.fnc(dd[,c("c_Trl", "Selection.sum", "c_Vag", "c_Qty", "c_Ord", "item_mean_ratio", "RTprev_log")])$cnumber)
## k is: 33.88255
suppressPackageStartupMessages(require(rms))
par(mar=c(1.1,3.1,1.1,1.1), pty='s')
plot(varclus(as.matrix(dd[,c("c_Trl", "Selection.sum", "c_Vag", "c_Qty", "c_Ord", "item_mean_ratio", "RTprev_log")])))

Model criticism baayen plots

# Baayen 4-plot model criticism

par(mfrow=c(1,4), pty='s')
# create scaled residuals
dd$rstand = as.vector(scale(resid(rtm)))
# plot scaled residuals density
plot(density(dd$rstand))
# plot sample quantiles versus theoretical quantiles
qqnorm(dd$rstand, cex=.5)
qqline(dd$rstand)
# plot standardised residuals versus fitted values
plot(dd$rstand ~ fitted(rtm), pch='.')
# absolute standardised residuals greater than 2.5 are candidates for being outliers, the abline identifies them on the plot
abline(h=c(-2.5,2.5))

Refit model with outliers removed (3.5 sd)

dd2 <- subset(dd, abs(rstand)<3.5 )
# backing off A LOT from maximal model to avoid warning about maxfun
rtm <- lmer(
  data=dd2,
  RT_log ~ 
    c_Vag * Selection.sum + item_mean_ratio + c_Qty + c_Ord + s_Trl +
    RTprev_log  + 
  ( c_Vag * Selection.sum | Subject)
)
rtm.summary <- summary(rtm)
rtm.confint <- confint(rtm, method='Wald')

Print a table of coefficients.

Estimate Std. Error df t value Pr(>|t|)
(Intercept) 5.98 0.10 473.8 61.7 0.000
c_Vag -0.02 0.01 54.7 -1.6 0.115
Selection.sum 0.19 0.02 39.0 10.7 0.000
item_mean_ratio 0.44 0.04 7531.2 11.8 0.000
c_Qty -0.01 0.01 7530.7 -0.7 0.481
c_Ord 0.02 0.01 7530.6 1.9 0.062
s_Trl -0.07 0.00 7543.0 -17.0 0.000
RTprev_log 0.13 0.01 7587.3 12.5 0.000
c_Vag:Selection.sum 0.14 0.03 40.1 5.3 0.000

Plot coefficients

par(las=2, mar=c(14,6,1,1))
y=coef(rtm.summary)[-1,1] # omit intercept
n=length(y)
plot(y, xaxt="n", xlab="", ylab="RT_log\n", pch=19, ylim=extendrange(y, f=.20), main="Coefficients")
abline(h=0)
axis(1, labels=names(y) , at=1:n)
grid()
segments(x0=1:n, x1=1:n, y0=rtm.confint[13:20,1], y1=rtm.confint[13:20,2], lwd=2)

Plot partial effects

par(mfrow=c(2,4))
plotLMER.fnc(rtm)
## effect size (range) for  c_Vag is  0.08361262 
## effect size (range) for  Selection.sum is  0.2614438 
## effect size (range) for  item_mean_ratio is  0.1326846 
## effect size (range) for  c_Qty is  0.006014844 
## effect size (range) for  c_Ord is  0.01593027 
## effect size (range) for  s_Trl is  0.2560567 
## effect size (range) for  RTprev_log is  0.4740353

suppressPackageStartupMessages(require(languageR))
cat("k is:",
collin.fnc(dd2[,c("c_Trl", "Selection.sum", "c_Vag", "c_Qty", "c_Ord", "item_mean_ratio", "RTprev_log")])$cnumber
)
## k is: 33.87351
suppressPackageStartupMessages(require(rms))
par(mar=c(1.1,3.1,1.1,1.1), pty='s')
plot(varclus(as.matrix(dd2[,c("c_Trl", "Selection.sum", "c_Vag", "c_Qty", "c_Ord", "item_mean_ratio", "RTprev_log")])))

Model criticism baayen plots

# Baayen 4-plot model criticism

par(mfrow=c(1,4), pty='s')
# create scaled residuals
dd2$rstand = as.vector(scale(resid(rtm)))
# plot scaled residuals density
plot(density(dd2$rstand))
# plot sample quantiles versus theoretical quantiles
qqnorm(dd2$rstand, cex=.5)
qqline(dd2$rstand)
# plot standardised residuals versus fitted values
plot(dd2$rstand ~ fitted(rtm), pch='.')
# absolute standardised residuals greater than 2.5 are candidates for being outliers, the abline identifies them on the plot
abline(h=c(-2.5,2.5))

Separate analyses for vague vs crisp at each level of selection (matching, and comparison)

not done yet.

Estimate Std. Error df t value Pr(>|t|)
(Intercept) 5.98 0.10 473.8 61.7 0.000
c_Vag -0.02 0.01 54.7 -1.6 0.115
Selection.sum 0.19 0.02 39.0 10.7 0.000
item_mean_ratio 0.44 0.04 7531.2 11.8 0.000
c_Qty -0.01 0.01 7530.7 -0.7 0.481
c_Ord 0.02 0.01 7530.6 1.9 0.062
s_Trl -0.07 0.00 7543.0 -17.0 0.000
RTprev_log 0.13 0.01 7587.3 12.5 0.000
c_Vag:Selection.sum 0.14 0.03 40.1 5.3 0.000

Borderline analysis

not done yet.